Make decision tree from iris data

Taken from Google's Visualizing a Decision Tree - Machine Learning Recipes #2


In [2]:
import tensorflow.contrib.learn as skflow
from sklearn.datasets import load_iris
from sklearn import metrics

In [58]:
iris = load_iris()

In [59]:
iris.keys()


Out[59]:
dict_keys(['DESCR', 'data', 'target_names', 'target', 'feature_names'])

In [60]:
iris.feature_names


Out[60]:
['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [61]:
iris.target_names


Out[61]:
array(['setosa', 'versicolor', 'virginica'], 
      dtype='<U10')

In [62]:
# Withhold 3 for testing
test_idx = [0, 50, 100]

train_data = np.delete(iris.data, test_idx, axis=0)
train_target = np.delete(iris.target, test_idx)

In [63]:
test_target = iris.target[test_idx]  # array([0, 1, 2])

In [64]:
test_data = iris.data[test_idx]  # array([[ 5.1,  3.5,  1.4,  0.2], [ 7. ,  3.2,  4.7,  1.4], ...])

Deep neural network

3 layer deep neural network with 10, 20 and 10 hidden units in each layer, respectively.


In [19]:
classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
classifier.fit(iris.data, iris.target)


Step #100, epoch #20, avg. train loss: 0.54416
Step #200, epoch #40, avg. train loss: 0.14521
Out[19]:
TensorFlowDNNClassifier(batch_size=32, class_weight=None, clip_gradients=5.0,
            config=None, continue_training=False, dropout=None,
            hidden_units=[10, 20, 10], learning_rate=0.1, n_classes=3,
            optimizer='Adagrad', steps=200, verbose=1)

In [20]:
metrics.accuracy_score(iris.target, classifier.predict(iris.data))


Out[20]:
0.93999999999999995

Custom model with TensorFlowEstimator()


In [29]:
def my_model(X, y):
    """This is DNN with 10, 20, 10 hidden layers, and dropout of 0.5 probability."""
    layers = skflow.ops.dnn(X, [10, 20, 10])  # keep_prob=0.5 causes error
    return skflow.models.logistic_regression(layers, y)

In [30]:
classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=3)
classifier.fit(iris.data, iris.target)


Step #100, epoch #20, avg. train loss: 0.54416
Step #200, epoch #40, avg. train loss: 0.14521
Out[30]:
TensorFlowEstimator(batch_size=32, class_weight=None, clip_gradients=5.0,
          config=None, continue_training=False, learning_rate=0.1,
          model_fn=<function my_model at 0x10fa567b8>, n_classes=3,
          optimizer='Adagrad', steps=200, verbose=1)

In [32]:
metrics.accuracy_score(iris.target, classifier.predict(iris.data))


Out[32]:
0.93999999999999995

In [49]:
classifier = skflow.TensorFlowRNNClassifier(rnn_size=2, n_classes=15)

In [50]:
classifier.fit(iris.data, iris.target)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-50-a44f5247af37> in <module>()
----> 1 classifier.fit(iris.data, iris.target)

/Users/kyle/cltk/venv/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/base.py in fit(self, X, y, monitor, logdir)
    225         if not self.continue_training or not self._initialized:
    226             # Sets up model and trainer.
--> 227             self._setup_training()
    228             self._initialized = True
    229         else:

/Users/kyle/cltk/venv/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/base.py in _setup_training(self)
    146             # Create model's graph.
    147             self._model_predictions, self._model_loss = self.model_fn(
--> 148                 self._inp, self._out)
    149 
    150             # Set up a single operator to merge all the summaries

/Users/kyle/cltk/venv/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/rnn.py in _model_fn(self, X, y)
     95                                     models.logistic_regression,
     96                                     self.sequence_length,
---> 97                                     self.initial_state)(X, y)
     98 
     99     @property

/Users/kyle/cltk/venv/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/models.py in rnn_estimator(X, y)
    335             _, encoding = nn.rnn(cell, X, dtype=dtypes.float32,
    336                                     sequence_length=sequence_length,
--> 337                                     initial_state=initial_state)
    338         return target_predictor_fn(encoding, y)
    339     return rnn_estimator

/Users/kyle/cltk/venv/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py in rnn(cell, inputs, initial_state, dtype, sequence_length, scope)
     86     raise TypeError("cell must be an instance of RNNCell")
     87   if not isinstance(inputs, list):
---> 88     raise TypeError("inputs must be a list")
     89   if not inputs:
     90     raise ValueError("inputs must not be empty")

TypeError: inputs must be a list

In [56]:
import numpy as np
from tensorflow.contrib.learn.python import learn
import tensorflow as tf

np.random.seed(42)
data = np.array(
    list([[2, 1, 2, 2, 3], [2, 2, 3, 4, 5], [3, 3, 1, 2, 1], [2, 4, 5, 4, 1]
         ]),
    dtype=np.float32)
# labels for classification
labels = np.array(list([1, 0, 1, 0]), dtype=np.float32)
# targets for regression
targets = np.array(list([10, 16, 10, 16]), dtype=np.float32)
test_data = np.array(list([[1, 3, 3, 2, 1], [2, 3, 4, 5, 6]]))

def input_fn(X):
  return tf.split(1, 5, X)

# Classification
classifier = learn.TensorFlowRNNClassifier(rnn_size=2,
                                           cell_type="lstm",
                                           n_classes=2,
                                           input_op_fn=input_fn)
classifier.fit(data, labels)
classifier.weights_
classifier.bias_
predictions = classifier.predict(test_data)
#assertAllClose(predictions, np.array([1, 0]))

classifier = learn.TensorFlowRNNClassifier(rnn_size=2,
                                           cell_type="rnn",
                                           n_classes=2,
                                           input_op_fn=input_fn,
                                           num_layers=2)
classifier.fit(data, labels)


Out[56]:
TensorFlowRNNClassifier(batch_size=32, bidirectional=False, cell_type='rnn',
            class_weight=None, clip_gradients=5.0, config=None,
            continue_training=False, initial_state=None,
            input_op_fn=<function input_fn at 0x1100fe2f0>,
            learning_rate=0.1, n_classes=2, num_layers=2,
            optimizer='Adagrad', rnn_size=2, sequence_length=None,
            steps=50, verbose=1)

In [66]:
classifier.predict(iris.data)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-66-329a9c26d74b> in <module>()
----> 1 classifier.predict(iris.data)

/Users/kyle/cltk/venv/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/base.py in predict(self, X, axis, batch_size)
    325             value.
    326         """
--> 327         return self._predict(X, axis=axis, batch_size=batch_size)
    328 
    329     def predict_proba(self, X, batch_size=None):

/Users/kyle/cltk/venv/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/base.py in _predict(self, X, axis, batch_size)
    297             predictions_for_batch = self._session.run(
    298                 self._model_predictions,
--> 299                 feed_dict)
    300             if self.n_classes > 1 and axis != -1:
    301                 preds.append(predictions_for_batch.argmax(axis=axis))

/Users/kyle/cltk/venv/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    338     try:
    339       result = self._run(None, fetches, feed_dict, options_ptr,
--> 340                          run_metadata_ptr)
    341       if run_metadata:
    342         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/Users/kyle/cltk/venv/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    551                 'Cannot feed value of shape %r for Tensor %r, '
    552                 'which has shape %r'
--> 553                 % (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
    554           if not self.graph.is_feedable(subfeed_t):
    555             raise ValueError('Tensor %s may not be fed.' % subfeed_t)

ValueError: Cannot feed value of shape (32, 4) for Tensor 'input:0', which has shape '(?, 5)'

In [ ]: